#!/usr/bin/env python
# -*- coding:utf-8 -*- 
import urllib2
import re
import pickle
from find_qq_pic import*
def get_blog_url():
	lst_url=[]
	for i in xrange(1,8):
		url="http://blog.sina.com.cn/s/articlelist_1578964772_0_%s.html" % i
		print url
		print '----------------------------------'		
		body=urllib2.urlopen(url).read()
		li=re.findall('\"(http://blog.sina.com.cn/s/blog[\s\S]*?)\"',body)
		for i in li:
			lst_url.append(i)
		print li
	f=open('sina_zhengmei.pic','w')
	pickle.dump(lst_url,f)
	f.close()

def find_sina_pic(url):
    print 'start'
    cj=cookielib.LWPCookieJar()
    opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)
    headers={
'User-Agent':'Mozilla/5.0 Firefox/3.6.8',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'en-us,en;q=0.5',
'Accept-Encoding':'deflate',
'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Keep-Alive':'115',
'Connection':'keep-alive',
'Cache-Control':'no-cache',}
    print 'start request url'
    req=urllib2.Request(url,headers=headers)
    html=opener.open(req).read()
    print len(html),'read html ok'
    title=re.findall('<title>([\s\S]*?)</title>',html)
    title=title[0]
    title=title.split('-')[0]
    title=re.sub('\。（组图）_色娃_新浪博客','',title)
    lst_img_url=re.findall('real_src =\"(http://[\s\S]*?\.photo\.sina\.com[\s\S]*?)\"',html)
    #try:
     #   del lst_img_url[-1]
      #  del lst_img_url[-1]
    #except:
     #   pass
    lst_img=[]
    for i in lst_img_url:
        lst_img.append(('','',i))
    return lst_img,title

if __name__=='__main__':
	#get_blog_url()
	f=open('sina_zhengmei.pic')
	lst_url=pickle.load(f)
	for i in range(142):
		del lst_url[0]
	num=0
	for i in lst_url:
		print i 
		lst_img,title=find_sina_pic(i)
		tag=u'正妹'#raw_input('请输入标签：')
            	catalog=u'美女'#raw_input('请输入目录：')
            	name=''#raw_input('请输入名称：')
		num+=1
		try:
            		data_site(lst_img,title,name,tag,catalog)
	    	except:
			pass
		print '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
		print 'process %s work well' % i
   		print '==========================================='		
		print num
	


